Explore the reasons RRT events are called

Have Jan 2015 - Aug 2016 loaded in cluster.


In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as datetime
from impala.util import as_pandas
from collections import defaultdict
from operator import itemgetter
import cPickle as pickle
%matplotlib notebook
plt.style.use('ggplot')

In [ ]:
from impala.dbapi import connect
conn = connect(host="mycluster.domain.com", port=my_impala_port_number)
cur = conn.cursor()
cur.execute("use my_db")

function definitions


In [ ]:
def find_rrt_freq(reasons):
    '''
    reasons is a python list of RRT reasons -- 1 line for each RRT event.
    Each entry is a different RRT event; there may be multiple reasons per event
    output: a pandas dataframe with the counts for each reason.
    '''

    rrt_reasons = defaultdict(int)
    
    for reason in reasons:
        otherreason = ''
        if reason.lower().startswith("other:"):
            # if the line starts with "other" --> the only reason is the otherreason(s)
            otherreason = reason.lower().split('other:')[1]
            rrts = []
        else:
            # if the line contains "other:" or not
            splitreason = reason.lower().split('other:')
            if len(splitreason) > 1:
                # if an "other" reason exists, process it differently
                otherreason = splitreason[1] # text of the line after 'other:'
            primaryreason = splitreason[0].strip().strip('"')
            rrts = primaryreason.split(',')
    
        for rrt in rrts:
            rrt = rrt.strip()
            # loop through list of rrt reasons for patient & add to count tracker
            if len(rrt) > 0:
                # included len check b/c splitting on "other" above caused trailing comma
                if rrt not in rrt_reasons.keys():
                    rrt_reasons[rrt] = 1
                else:
                    rrt_reasons[rrt] += 1
    
        if len(otherreason) > 0:
            # handle the "other" reason(s)
            otherreason = "other: " + otherreason.strip().strip('"')
            if otherreason not in rrt_reasons.keys():
                rrt_reasons[otherreason] = 1
            else:
                rrt_reasons[otherreason] += 1
                
    return pd.DataFrame(rrt_reasons, index=['count']).transpose().sort_values('count', ascending=False).reset_index()

In [ ]:
def count_others(reasons):
    '''
    Count how many "Other" reasons there are, both occurring alone & with other reasons.
    "other_counts" is a dict which contains "only_other" & "other_withothers" as keys
    '''
    
    other_counts = defaultdict(int)
    other_counts['only_other'] = 0
    other_counts['other_withothers'] = 0

    for reason in reasons:
        if 'other' in reason.lower():
            if reason.lower().startswith('other'):
                other_counts['only_other'] += 1
            else:
                other_counts['other_withothers'] +=1
    
    print other_counts

In [ ]:
def count_staffconcern(reasons):
    '''
    input: list of reasons
    Counts how many time staff concern line happens, both by itself and with other reasons.
    '''

    staff_counts = defaultdict(int)
    staff_counts['by_itself'] = 0
    staff_counts['with_other_reasons'] = 0

    for reason in reasons:
        if 'staff concern' in reason.lower():
            if 'patient,' in reason.lower():
                staff_counts['with_other_reasons'] += 1
            else:
                staff_counts['by_itself'] += 1
    
    print staff_counts

In [ ]:
def avg_num_reasons(reasons):
    '''
    input: list of reasons; each entry is an RRT reason
    ouput: average number of reasons per RRT call.
    '''
    reasoncount = 0.0
    rrtcount = 0.0
    
    for entry in reasons:
        reasoncount += len(entry.split(','))
        rrtcount +=1
        
    return reasoncount/rrtcount

Query impala for rrt reasons

only looking at valid events (ce.valid_until_dt_tm > 4e12) & complete encounters for inpatients (enc.encntr_type_class_cd='391') at Main Hospital (enc.loc_facility_cd='633867')


In [ ]:
query = '''
SELECT ce.event_tag
FROM encounter enc
INNER JOIN clinical_event ce
ON enc.encntr_id = ce.encntr_id
WHERE enc.loc_facility_cd='633867'
AND enc.encntr_complete_dt_tm < 4e12
AND ce.event_cd='54408578'
AND ce.result_status_cd NOT IN ('31', '36')
AND ce.valid_until_dt_tm > 4e12
AND ce.event_class_cd not in ('654645')
AND enc.admit_type_cd !='0'
AND enc.encntr_type_class_cd='391';
'''

cur.execute(query)
reasons = cur.fetchall()  # would read result into a list of tuples, e.g. [('Other: iv start',),(...)...]

In [ ]:
# look at the result
reasons

In [ ]:
# Make the tuple within a list a simple list
reasons = [reason[0] for reason in reasons]

In [ ]:
reasons

Number of reasons given (note, not all rrt events have reasons given; we have 2048 rrt events)


In [ ]:
len(reasons)

Number of reasons with "Other: ..." reason provided (user filled)


In [ ]:
count_others(reasons)

This means num_only_other/num_rrt_events => % of reasons have only a personnel-specified reason listed

Number of reasons including "Staff Concern" as a reason, by itself & with others


In [ ]:
count_staffconcern(reasons)

This means num_by_itself/num_rrt_events => % of all RRTs have no specific reason provided

Average number of reasons per RRT call:


In [ ]:
avg_num_reasons(reasons)

Count the occurrences -- what are the most frequent reasons?


In [ ]:
df_reasons = find_rrt_freq(reasons)

In [ ]:
df_reasons

Staff concern for patient is the top reason for RRT.

Visualize the top 15 reaons


In [ ]:
plt.figure(figsize=(12,8))
plt.tight_layout
val = df_reasons['count'][0:15]
pos = np.arange(15)+0.5 #bar centers on the y axis
plt.barh(-pos, val, align='center')
plt.yticks(-pos, df_reasons['index'][0:15])
plt.tick_params(direction='in', labelsize='16', pad=1)
plt.xlabel('Frequency of Reason', fontsize='16')
plt.title('Top Reasons for RRT Event, Jan 2015 - Aug 2016', fontsize = '16')
plt.tight_layout()

# Run the line below to save an image of the chart
plt.savefig('RRT_top15reasons.png')